# ============================================================================
# Confirmatory Factor Analysis (CFA) Script
# Supervisor Bottom-Line Mentality Study
# ============================================================================

# Load required libraries
library(readxl)      # For reading Excel files
library(lavaan)      # For CFA analysis
library(semTools)    # For additional SEM tools
library(dplyr)       # For data manipulation
library(psych)       # For Cronbach's alpha calculation

# ============================================================================
# Step 1: Read the Excel file
# ============================================================================


# Read the Excel file
data <- read_excel("C:/Users/wangm/Nutstore/1/AI & Big Data Group/领导底线心智BLM-工作拖延/Supplemental materials/CC/Supervisor Bottom-Line Mentality_Recoded.xlsx")

# Check the structure of the data
str(data)
head(data)

# Select only the variables needed for CFA
cfa_vars <- c("SBL1", "SBL2", "SBL3", "SBL4", 
              "RT1", "RT2", "RT3",
              "GC1", "GC2", "GC3", "GC4", "GC5",
              "JC1", "JC2", "JC3", "JC4", "JC5", "JC6",
              "WP1", "WP2", "WP3", "WP4", "WP5", "WP6", "WP7", "WP8")

# Check if all variables exist in the data
missing_vars <- setdiff(cfa_vars, names(data))
if(length(missing_vars) > 0) {
  warning(paste("The following variables are missing:", paste(missing_vars, collapse = ", ")))
  cfa_vars <- intersect(cfa_vars, names(data))
}

# Create a subset with only CFA variables
cfa_data <- data[, cfa_vars]

# Convert to numeric (in case variables are read as character)
cfa_data <- as.data.frame(lapply(cfa_data, as.numeric))

# Check for missing values
cat("\n=== Data Quality Check ===\n")
cat("Total missing values:", sum(is.na(cfa_data)), "\n")
cat("Missing values per variable:\n")
print(colSums(is.na(cfa_data)))

# Remove rows with any missing values (listwise deletion)
cfa_data_complete <- na.omit(cfa_data)
cat("\nOriginal sample size:", nrow(cfa_data), "\n")
cat("Complete cases:", nrow(cfa_data_complete), "\n")
cat("Removed cases:", nrow(cfa_data) - nrow(cfa_data_complete), "\n")

# Use complete cases for analysis
cfa_data <- cfa_data_complete

# Summary statistics
cat("\n=== Summary Statistics ===\n")
summary(cfa_data)

# ============================================================================
# Step 2: Define CFA Models
# ============================================================================

# Five-factor model: SBLM; RT; WGC; JC; WP
model_5factor <- '
  SBLM =~ SBL1 + SBL2 + SBL3 + SBL4
  RT   =~ RT1 + RT2 + RT3
  WGC  =~ GC1 + GC2 + GC3 + GC4 + GC5
  JC   =~ JC1 + JC2 + JC3 + JC4 + JC5 + JC6
  WP   =~ WP1 + WP2 + WP3 + WP4 + WP5 + WP6 + WP7 + WP8
'

# Four-factor model: SBLM; RT; WGC+JC; WP
model_4factor <- '
  SBLM =~ SBL1 + SBL2 + SBL3 + SBL4
  RT   =~ RT1 + RT2 + RT3
  WGC_JC =~ GC1 + GC2 + GC3 + GC4 + GC5 + JC1 + JC2 + JC3 + JC4 + JC5 + JC6
  WP   =~ WP1 + WP2 + WP3 + WP4 + WP5 + WP6 + WP7 + WP8
'

# Three-factor model: SBLM+RT; WGC+JC; WP
model_3factor <- '
  SBLM_RT =~ SBL1 + SBL2 + SBL3 + SBL4 + RT1 + RT2 + RT3
  WGC_JC  =~ GC1 + GC2 + GC3 + GC4 + GC5 + JC1 + JC2 + JC3 + JC4 + JC5 + JC6
  WP      =~ WP1 + WP2 + WP3 + WP4 + WP5 + WP6 + WP7 + WP8
'

# Two-factor model: SBLM+RT+WGC+JC; WP
model_2factor <- '
  SBLM_RT_WGC_JC =~ SBL1 + SBL2 + SBL3 + SBL4 + RT1 + RT2 + RT3 + 
                    GC1 + GC2 + GC3 + GC4 + GC5 + 
                    JC1 + JC2 + JC3 + JC4 + JC5 + JC6
  WP =~ WP1 + WP2 + WP3 + WP4 + WP5 + WP6 + WP7 + WP8
'

# Single-factor model: SBLM+RT+WGC+JC+WP
model_1factor <- '
  General =~ SBL1 + SBL2 + SBL3 + SBL4 + 
             RT1 + RT2 + RT3 + 
             GC1 + GC2 + GC3 + GC4 + GC5 + 
             JC1 + JC2 + JC3 + JC4 + JC5 + JC6 + 
             WP1 + WP2 + WP3 + WP4 + WP5 + WP6 + WP7 + WP8
'

# ============================================================================
# Step 3: Run CFA for each model
# ============================================================================

# Five-factor model
fit_5factor <- cfa(model_5factor, data = cfa_data, estimator = "MLR")
summary(fit_5factor, fit.measures = TRUE, standardized = TRUE)

# Four-factor model
fit_4factor <- cfa(model_4factor, data = cfa_data, estimator = "MLR")
summary(fit_4factor, fit.measures = TRUE, standardized = TRUE)

# Three-factor model
fit_3factor <- cfa(model_3factor, data = cfa_data, estimator = "MLR")
summary(fit_3factor, fit.measures = TRUE, standardized = TRUE)

# Two-factor model
fit_2factor <- cfa(model_2factor, data = cfa_data, estimator = "MLR")
summary(fit_2factor, fit.measures = TRUE, standardized = TRUE)

# Single-factor model
fit_1factor <- cfa(model_1factor, data = cfa_data, estimator = "MLR")
summary(fit_1factor, fit.measures = TRUE, standardized = TRUE)

# ============================================================================
# Step 4: Model Comparison
# ============================================================================

# Extract fit indices for all models
fit_indices <- function(fit) {
  fit_measures <- fitMeasures(fit, 
                               fit.measures = c("chisq", "df", "pvalue",
                                                "cfi", "tli", "rmsea", 
                                                "rmsea.ci.lower", "rmsea.ci.upper",
                                                "srmr", "aic", "bic"))
  return(fit_measures)
}

# Create a comparison table
comparison_table <- data.frame(
  Model = c("Five-factor", "Four-factor", "Three-factor", 
            "Two-factor", "Single-factor"),
  Chi_square = c(fitMeasures(fit_5factor, "chisq"),
                 fitMeasures(fit_4factor, "chisq"),
                 fitMeasures(fit_3factor, "chisq"),
                 fitMeasures(fit_2factor, "chisq"),
                 fitMeasures(fit_1factor, "chisq")),
  df = c(fitMeasures(fit_5factor, "df"),
         fitMeasures(fit_4factor, "df"),
         fitMeasures(fit_3factor, "df"),
         fitMeasures(fit_2factor, "df"),
         fitMeasures(fit_1factor, "df")),
  p_value = c(fitMeasures(fit_5factor, "pvalue"),
              fitMeasures(fit_4factor, "pvalue"),
              fitMeasures(fit_3factor, "pvalue"),
              fitMeasures(fit_2factor, "pvalue"),
              fitMeasures(fit_1factor, "pvalue")),
  CFI = c(fitMeasures(fit_5factor, "cfi"),
          fitMeasures(fit_4factor, "cfi"),
          fitMeasures(fit_3factor, "cfi"),
          fitMeasures(fit_2factor, "cfi"),
          fitMeasures(fit_1factor, "cfi")),
  TLI = c(fitMeasures(fit_5factor, "tli"),
          fitMeasures(fit_4factor, "tli"),
          fitMeasures(fit_3factor, "tli"),
          fitMeasures(fit_2factor, "tli"),
          fitMeasures(fit_1factor, "tli")),
  RMSEA = c(fitMeasures(fit_5factor, "rmsea"),
            fitMeasures(fit_4factor, "rmsea"),
            fitMeasures(fit_3factor, "rmsea"),
            fitMeasures(fit_2factor, "rmsea"),
            fitMeasures(fit_1factor, "rmsea")),
  RMSEA_CI_Lower = c(fitMeasures(fit_5factor, "rmsea.ci.lower"),
                     fitMeasures(fit_4factor, "rmsea.ci.lower"),
                     fitMeasures(fit_3factor, "rmsea.ci.lower"),
                     fitMeasures(fit_2factor, "rmsea.ci.lower"),
                     fitMeasures(fit_1factor, "rmsea.ci.lower")),
  RMSEA_CI_Upper = c(fitMeasures(fit_5factor, "rmsea.ci.upper"),
                     fitMeasures(fit_4factor, "rmsea.ci.upper"),
                     fitMeasures(fit_3factor, "rmsea.ci.upper"),
                     fitMeasures(fit_2factor, "rmsea.ci.upper"),
                     fitMeasures(fit_1factor, "rmsea.ci.upper")),
  SRMR = c(fitMeasures(fit_5factor, "srmr"),
           fitMeasures(fit_4factor, "srmr"),
           fitMeasures(fit_3factor, "srmr"),
           fitMeasures(fit_2factor, "srmr"),
           fitMeasures(fit_1factor, "srmr")),
  AIC = c(fitMeasures(fit_5factor, "aic"),
          fitMeasures(fit_4factor, "aic"),
          fitMeasures(fit_3factor, "aic"),
          fitMeasures(fit_2factor, "aic"),
          fitMeasures(fit_1factor, "aic")),
  BIC = c(fitMeasures(fit_5factor, "bic"),
          fitMeasures(fit_4factor, "bic"),
          fitMeasures(fit_3factor, "bic"),
          fitMeasures(fit_2factor, "bic"),
          fitMeasures(fit_1factor, "bic"))
)

# Round the values for better readability
comparison_table[, 2:ncol(comparison_table)] <- round(comparison_table[, 2:ncol(comparison_table)], 4)

# Print comparison table
cat("\n=== Model Comparison Table ===\n")
print(comparison_table)

# Format RMSEA with confidence interval
comparison_table$RMSEA_CI <- paste0(
  sprintf("%.4f", comparison_table$RMSEA), 
  " [", 
  sprintf("%.4f", comparison_table$RMSEA_CI_Lower), 
  ", ", 
  sprintf("%.4f", comparison_table$RMSEA_CI_Upper), 
  "]"
)

# Create a formatted table for reporting
formatted_table <- comparison_table[, c("Model", "Chi_square", "df", "p_value", 
                                        "CFI", "TLI", "RMSEA_CI", "SRMR", "AIC", "BIC")]
cat("\n=== Formatted Model Comparison Table ===\n")
print(formatted_table)

# Create table with format: Models | χ2 | df | χ2/df | CFI | TLI | RMSEA | SRMR
fit_summary_table <- data.frame(
  Models = c("Five-factor", "Four-factor", "Three-factor", 
             "Two-factor", "Single-factor"),
  χ2 = round(c(fitMeasures(fit_5factor, "chisq"),
               fitMeasures(fit_4factor, "chisq"),
               fitMeasures(fit_3factor, "chisq"),
               fitMeasures(fit_2factor, "chisq"),
               fitMeasures(fit_1factor, "chisq")), 2),
  df = c(fitMeasures(fit_5factor, "df"),
         fitMeasures(fit_4factor, "df"),
         fitMeasures(fit_3factor, "df"),
         fitMeasures(fit_2factor, "df"),
         fitMeasures(fit_1factor, "df")),
  χ2.df = round(c(fitMeasures(fit_5factor, "chisq") / fitMeasures(fit_5factor, "df"),
                  fitMeasures(fit_4factor, "chisq") / fitMeasures(fit_4factor, "df"),
                  fitMeasures(fit_3factor, "chisq") / fitMeasures(fit_3factor, "df"),
                  fitMeasures(fit_2factor, "chisq") / fitMeasures(fit_2factor, "df"),
                  fitMeasures(fit_1factor, "chisq") / fitMeasures(fit_1factor, "df")), 2),
  CFI = round(c(fitMeasures(fit_5factor, "cfi"),
                fitMeasures(fit_4factor, "cfi"),
                fitMeasures(fit_3factor, "cfi"),
                fitMeasures(fit_2factor, "cfi"),
                fitMeasures(fit_1factor, "cfi")), 3),
  TLI = round(c(fitMeasures(fit_5factor, "tli"),
                fitMeasures(fit_4factor, "tli"),
                fitMeasures(fit_3factor, "tli"),
                fitMeasures(fit_2factor, "tli"),
                fitMeasures(fit_1factor, "tli")), 3),
  RMSEA = round(c(fitMeasures(fit_5factor, "rmsea"),
                 fitMeasures(fit_4factor, "rmsea"),
                 fitMeasures(fit_3factor, "rmsea"),
                 fitMeasures(fit_2factor, "rmsea"),
                 fitMeasures(fit_1factor, "rmsea")), 3),
  SRMR = round(c(fitMeasures(fit_5factor, "srmr"),
                 fitMeasures(fit_4factor, "srmr"),
                 fitMeasures(fit_3factor, "srmr"),
                 fitMeasures(fit_2factor, "srmr"),
                 fitMeasures(fit_1factor, "srmr")), 3)
)

cat("\n=== Fit Indices Summary Table ===\n")
cat("Format: Models | χ2 | df | χ2/df | CFI | TLI | RMSEA | SRMR\n")
print(fit_summary_table)

# Save comparison table to CSV
write.csv(comparison_table, "CFA_Model_Comparison.csv", row.names = FALSE)
write.csv(formatted_table, "CFA_Model_Comparison_Formatted.csv", row.names = FALSE)
write.csv(fit_summary_table, "CFA_Fit_Indices_Summary.csv", row.names = FALSE)

# ============================================================================
# Step 5: Chi-square difference tests
# ============================================================================

# Compare nested models using chi-square difference test
anova(fit_5factor, fit_4factor, fit_3factor, fit_2factor, fit_1factor)

# ============================================================================
# Step 6: Additional diagnostics for the best model
# ============================================================================

# Extract standardized loadings for five-factor model
cat("\n=== Five-Factor Model: Standardized Loadings ===\n")
standardized_solution_5factor <- standardizedSolution(fit_5factor)
print(standardized_solution_5factor)

# Save standardized loadings
write.csv(standardized_solution_5factor, "CFA_5Factor_Standardized_Loadings.csv", row.names = FALSE)

# Factor correlations for five-factor model
cat("\n=== Five-Factor Model: Factor Correlations ===\n")
factor_correlations <- lavInspect(fit_5factor, "cor.lv")
print(round(factor_correlations, 3))

# Save factor correlations
write.csv(round(factor_correlations, 4), "CFA_5Factor_Correlations.csv")

# ============================================================================
# Step 6a: Reliability and Validity Analysis
# ============================================================================

cat("\n=== Reliability and Validity Analysis ===\n")

# Extract standardized loadings
std_loadings <- standardizedSolution(fit_5factor)
std_loadings <- std_loadings[std_loadings$op == "=~", ]

# Get factor names and items
factor_names <- c("SBLM", "RT", "WGC", "JC", "WP")
factor_labels <- c("Supervisor Bottom-Line Mentality", 
                   "Perceived Red Tape", 
                   "Work Goal Clarity", 
                   "Job Calling", 
                   "Work Procrastination")

# Item labels for each factor
item_mapping <- list(
  SBLM = c("SBL1", "SBL2", "SBL3", "SBL4"),
  RT = c("RT1", "RT2", "RT3"),
  WGC = c("GC1", "GC2", "GC3", "GC4", "GC5"),
  JC = c("JC1", "JC2", "JC3", "JC4", "JC5", "JC6"),
  WP = c("WP1", "WP2", "WP3", "WP4", "WP5", "WP6", "WP7", "WP8")
)

# Calculate reliability using new functions
# Composite Reliability (CR) using compRelSEM
CR_values <- compRelSEM(fit_5factor, return.total = FALSE)

# Average Variance Extracted (AVE) using AVE function
AVE_values <- AVE(fit_5factor)

# Cronbach's Alpha (using psych package)

# Calculate Cronbach's Alpha for each factor
CA_values <- sapply(factor_names, function(factor) {
  items <- item_mapping[[factor]]
  item_data <- cfa_data[, items]
  alpha_result <- psych::alpha(item_data, check.keys = FALSE)
  return(alpha_result$total$raw_alpha)
})

# Create reliability and validity table
reliability_validity_table <- data.frame()

for (i in 1:length(factor_names)) {
  factor <- factor_names[i]
  items <- item_mapping[[factor]]
  
  for (j in 1:length(items)) {
    item <- items[j]
    loading <- std_loadings$est.std[std_loadings$lhs == factor & std_loadings$rhs == item]
    
    # Add row to table
    if (j == 1) {
      # First item: include variable name and all statistics
      reliability_validity_table <- rbind(
        reliability_validity_table,
        data.frame(
          Variables = factor_labels[i],
          Items = item,
          Loadings = round(loading, 3),
          CA = round(CA_values[factor], 3),
          CR = round(CR_values[factor], 3),
          AVE = round(AVE_values[factor], 3),
          stringsAsFactors = FALSE
        )
      )
    } else {
      # Subsequent items: only item name and loading
      reliability_validity_table <- rbind(
        reliability_validity_table,
        data.frame(
          Variables = "",
          Items = item,
          Loadings = round(loading, 3),
          CA = "",
          CR = "",
          AVE = "",
          stringsAsFactors = FALSE
        )
      )
    }
  }
}

cat("\nReliability and Validity Table:\n")
print(reliability_validity_table)

# Save reliability and validity table
write.csv(reliability_validity_table, "Reliability_Validity_Analysis.csv", row.names = FALSE)

# Summary by factor
reliability_summary <- data.frame(
  Variables = factor_labels,
  CA = round(CA_values[factor_names], 3),
  CR = round(CR_values[factor_names], 3),
  AVE = round(AVE_values[factor_names], 3)
)
cat("\nReliability Summary by Factor:\n")
print(reliability_summary)
write.csv(reliability_summary, "Reliability_Summary.csv", row.names = FALSE)

# ============================================================================
# Step 6b: Descriptive Statistics and Correlations
# ============================================================================

cat("\n=== Descriptive Statistics and Correlations ===\n")

# Calculate factor scores for the five main variables
# Using mean scores (composite scores) for simplicity and reliability
cat("Calculating composite scores (mean scores) for main variables...\n")
data$SBL <- rowMeans(data[, c("SBL1", "SBL2", "SBL3", "SBL4")], na.rm = TRUE)
data$RT <- rowMeans(data[, c("RT1", "RT2", "RT3")], na.rm = TRUE)
data$GC <- rowMeans(data[, c("GC1", "GC2", "GC3", "GC4", "GC5")], na.rm = TRUE)
data$JC <- rowMeans(data[, c("JC1", "JC2", "JC3", "JC4", "JC5", "JC6")], na.rm = TRUE)
data$WP <- rowMeans(data[, c("WP1", "WP2", "WP3", "WP4", "WP5", "WP6", "WP7", "WP8")], na.rm = TRUE)

# Define variables for descriptive statistics and correlations
# Control variables and main variables
desc_vars <- c("Gender", "Age", "Edu", "Rank", "SStenure",
               "CS", "HS",  # Challenge Stressors, Hindrance Stressors
               "SBL", "GC", "JC", "WP", "RT")

# Check which variables exist in the data
available_vars <- intersect(desc_vars, names(data))
missing_desc_vars <- setdiff(desc_vars, names(data))
if(length(missing_desc_vars) > 0) {
  warning(paste("The following variables are missing for descriptive stats:", 
                paste(missing_desc_vars, collapse = ", ")))
  cat("Available variables:", paste(available_vars, collapse = ", "), "\n")
}

# Select available variables
desc_data <- data[, available_vars, drop = FALSE]

# Convert to numeric (in case variables are read as character)
desc_data <- as.data.frame(lapply(desc_data, function(x) {
  if(is.numeric(x)) return(x)
  as.numeric(as.character(x))
}))

# Remove rows with missing values
desc_data_complete <- na.omit(desc_data)
cat("Sample size for descriptive statistics:", nrow(desc_data_complete), "\n")

# Calculate means and standard deviations
means <- round(sapply(desc_data_complete, mean, na.rm = TRUE), 2)
sds <- round(sapply(desc_data_complete, sd, na.rm = TRUE), 2)

# Calculate correlation matrix and significance
cor_matrix <- cor(desc_data_complete, use = "complete.obs")

# Calculate p-values for correlations
cor_test <- function(x, y) {
  test <- cor.test(x, y)
  return(test$p.value)
}

cor_p_matrix <- matrix(NA, nrow = ncol(desc_data_complete), ncol = ncol(desc_data_complete))
colnames(cor_p_matrix) <- colnames(desc_data_complete)
rownames(cor_p_matrix) <- colnames(desc_data_complete)

for (i in 1:ncol(desc_data_complete)) {
  for (j in 1:ncol(desc_data_complete)) {
    if (i != j) {
      cor_p_matrix[i, j] <- cor.test(desc_data_complete[, i], desc_data_complete[, j])$p.value
    } else {
      cor_p_matrix[i, j] <- 1
    }
  }
}

# Create descriptive statistics table
desc_stats_table <- data.frame(
  Variables = names(means),
  M = means,
  SD = sds,
  stringsAsFactors = FALSE
)

cat("\nDescriptive Statistics:\n")
print(desc_stats_table)
write.csv(desc_stats_table, "Descriptive_Statistics.csv", row.names = FALSE)

# Create correlation table with means and SDs (lower triangle with significance)
n_vars <- length(available_vars)
cor_table <- matrix(NA, nrow = n_vars, ncol = n_vars + 2)
colnames(cor_table) <- c("Variables", "M", "SD", available_vars)
cor_table[, 1] <- available_vars
cor_table[, 2] <- sprintf("%.2f", means[available_vars])
cor_table[, 3] <- sprintf("%.2f", sds[available_vars])

# Fill in correlations (lower triangle) with significance markers
for (i in 1:n_vars) {
  for (j in 1:n_vars) {
    if (i > j) {
      # Lower triangle: show correlation with significance
      cor_val <- cor_matrix[available_vars[i], available_vars[j]]
      p_val <- cor_p_matrix[available_vars[i], available_vars[j]]
      
      # Add significance markers
      sig_marker <- ""
      if (p_val < 0.001) {
        sig_marker <- "***"
      } else if (p_val < 0.01) {
        sig_marker <- "**"
      } else if (p_val < 0.05) {
        sig_marker <- "*"
      }
      
      cor_table[i, j + 3] <- paste0(sprintf("%.3f", cor_val), sig_marker)
    } else if (i == j) {
      # Diagonal: show 1.000
      cor_table[i, j + 3] <- "1.000"
    } else {
      # Upper triangle: blank
      cor_table[i, j + 3] <- ""
    }
  }
}

# Convert to data frame
cor_table_df <- as.data.frame(cor_table)

cat("\nMeans, Standard Deviations, and Correlations:\n")
cat("Note: * p < 0.05, ** p < 0.01, *** p < 0.001\n")
print(cor_table_df)
write.csv(cor_table_df, "Means_SD_Correlations.csv", row.names = FALSE)

# Also create a formatted correlation matrix (full matrix)
full_cor_matrix <- round(cor_matrix, 3)
cat("\nFull Correlation Matrix:\n")
print(full_cor_matrix)
write.csv(full_cor_matrix, "Correlation_Matrix.csv")

# Save correlation p-values
write.csv(round(cor_p_matrix, 4), "Correlation_Pvalues.csv")

# ============================================================================
# Step 7: Export results
# ============================================================================

# Save workspace
save.image("CFA_Results.RData")

# Print summary
cat("\n========================================\n")
cat("CFA Analysis Complete\n")
cat("N =", nrow(cfa_data), "\n")
cat("========================================\n")

